/* 
Creates regression tables and figure in the main paper
except Table 1, which comes from reg_mfx.do

Also includes tables in
(1) Appendix C (estimates shown in Fig 1)
(2) Appendix D, which replicate Tables 2-5 with sc-PDSI 

* follows merge_grid.do
* uses main analysis dataset: for_reg.dta

*/

capture log close
log using reg_main, replace
timer on 1

use for_reg.dta, clear

egen cellid=group(x y)
xtset cellid year

*explicit interactions so can use "absorb"
egen contXyear=group(continent year)

* main specification controls for cubic in temperature
macro define weather "c.dev_tmp##c.dev_tmp##c.dev_tmp"
* to check senstivity to adding precip
*macro define weather "c.dev_tmp##c.dev_tmp##c.dev_tmp c.dev_precip##c.dev_precip##c.dev_precip"

*to allowing looping, create drought vars with comparable names
gen rdsi_cat=dsi_cat11
gen esmrdsi=esmdsi
label values rdsi_cat cat11
label var esmrdsi "Moderate or worse drought (DSI)" 


**# Figure 1 and Appendix Tables C1 and C2 (Full set of drought conditions, no mediators)
foreach v in r p {
xtreg ihslights ib6.`v'dsi_cat if flare==0, fe vce(cluster conpfaf4) absorb(contXyear)
quietly estadd local cellfe "Yes", replace
quietly estadd local contyr "Yes", replace
quietly estadd local tempeff "No", replace
estimates store `v'dsi_basic


**add temperature data 
xtreg ihslights ib6.`v'dsi_cat $weather if flare==0, fe vce(cluster conpfaf4) absorb(contXyear)
quietly estadd local cellfe "Yes", replace
quietly estadd local contyr "Yes", replace
quietly estadd local tempeff "No", replace
estimates store `v'dsi_withtemp

xtreg ihslights esm`v'dsi $weather if flare==0, fe vce(cluster conpfaf4) absorb(contXyear)
quietly estadd local cellfe "Yes", replace
quietly estadd local contyr "Yes", replace
quietly estadd local tempeff "No", replace
estimates store `v'dsi_binary
}


*increase marker size for visibility in mono
coefplot  (rdsi_withtemp, msize(medium) label(DSI)) (pdsi_withtemp, msize(medium) msymbol(Dh) label(sc-PDSI)), base drop(_cons *dev*) xline(0) levels(95) xtitle("Effect on ihs lights")  graphregion(fcolor(white))  rename(*.pdsi_cat = .rdsi_cat)
graph export "../output/Figure1.emf", replace


esttab rdsi_basic rdsi_withtemp rdsi_binary using "../output/tableC1_dsi.rtf", replace se r2(3) sca("cellfe Cell FE" "contyr Continent*year" "tempeff Temperature effects" "N_clust N subbasins" "N_g N cells") obslast nomtitles nobase label star( + .10 * .05 ** .01 ) drop(*dev_*  _cons) order(esmrdsi) /*
*/ title("Appendix Table C1: Impact of Drought Severity Index (DSI) on lights")   /*
*/ addnote("Notes: Dependent variable is inverse hyperbolic sine of nighttime lights index. Excluded DSI normal category in columns 1 and 3 is near normal. Standard errors in parentheses are clustered by cell.")

esttab pdsi_basic pdsi_withtemp pdsi_binary using "../output/tableC2_pdsi.rtf", replace se r2(3) sca("cellfe Cell FE" "contyr Continent*year" "tempeff Temperature effects" "N_clust N subbasins" "N_g N cells") obslast nomtitles nobase label  /// 
star( + .10 * .05 ** .01 ) drop(*dev_* _cons) order(esmpdsi) ///
title("Table 3: Impact of sc-PDSI on nighttime lights")  ///
addnote("Notes: Dependent variable is inverse hyperbolic sine of nighttime lights index. Excluded sc-PDSI  category in columns 1 and 3 is near normal. Standard errors in parentheses are clustered by cell.")


eststo clear


**# Tables 2 and D1: Groundwater interactions 

*Use WB aquifer typology percent variables as discrete
gen alluv=(aqtyp_max=="Major Alluvial") if !missing(aqtyp_max)
gen loc_shal=(aqtyp_max=="Local/Shallow") if !missing(aqtyp_max)
gen complex=(aqtyp_max=="Complex") if !missing(aqtyp_max)
gen karst=(aqtyp_max=="Karstic") if !missing(aqtyp_max)
gen alluv_ls=(alluv | loc_shal) if !missing(aqtyp_max)
label var alluv "Major alluvial"
label var alluv_ls "Alluvial/local/shallow"
label var loc_shal "Local/shallow"
label var complex "Complex"
label var karst "Karstic"

gen ihsgwresource=asinh(resource_norm)
label var ihsgwresource "ihs(Groundwater Resource)"

*interactions with drought
foreach v in dsi pdsi {
foreach var in alluv loc_shal complex karst alluv_ls ihsgwresource {
  local lbl: variable label `var'
  gen esm`v'_`var'=esm`v'*`var'
  label var esm`v'_`var' "`lbl'*drought"
  }	

*GW regressions
eststo: xtreg ihslights esm`v' $weather if flare==0 & !missing(aqtyp_max), fe vce(cluster conpfaf4) absorb(contXyear)
estimates store `v'_basic
eststo: xtreg ihslights esm`v' esm`v'_alluv esm`v'_loc_shal esm`v'_complex $weather if flare==0 & !missing(aqtyp_max), fe vce(cluster conpfaf4) absorb(contXyear)
estimates store `v'_gw
eststo: xtreg ihslights esm`v' esm`v'_alluv esm`v'_loc_shal esm`v'_complex esm`v'_ihsgwresource $weather if flare==0 & !missing(aqtyp_max), fe vce(cluster conpfaf4) absorb(contXyear)
estimates store `v'_gwresource
}


esttab dsi_basic dsi_gw dsi_gwresource using "../output/table2_GW_dsi.rtf", replace se r2(3) sca("N_clust N subbasins" "N_g N cells") obslast label star( + .10 * .05 ** .01 )  drop(*dev_* _cons) title("Table 2: Impact of remote sensed drought on lights mediated by GW") mtitles("No GW" "GW aquifer type" "GW aquifer type & resource") addnote("Notes: Dependent variable is inverse hyperbolic sine of nighttime lights index. Standard errors in parentheses are clustered by cell. All models include a cubic in temperature, fixed effects for cells, and continent*year effects. Relative to Table 1, these models drop all grid cells for which the maximum groundwater aquifer type was missing or unidentified. The three aquifer type coefficients are all relative to the excluded category, which is "karstic."")
 

esttab pdsi_basic pdsi_gw pdsi_gwresource  using "../output/tableD1_GW_pdsi.rtf", replace se r2(3) sca( "N_clust N subbasins" "N_g N cells") obslast nocon label star( + .10 * .05 ** .01 )  drop(*dev_* _cons) title("Table D1: Impact of drought (PDSI) on lights mediated by GW") mtitles("No GW" "GW aquifer type" "GW aquifer type & resource") addnote("Notes: Dependent variable is inverse hyperbolic sine of nighttime lights index. Standard errors in parentheses are clustered by subbasin. All models include a cubic in temperature, fixed effects for cells, and continent*year effects. Relative to Table 1, these models drop all grid cells for which the maximum groundwater aquifer type was missing or unidentified. The three aquifer type coefficients are all relative to the excluded category, which is "karstic."")
 
eststo clear


**# Set up data for impacts of dams

**# Table 3: Full spectrum for droughts interacted with dams 
gen pdsi_cat7=pdsi_cat
replace pdsi_cat7=7 if pdsi_cat>=7  & !missing(pdsi_av)
label values pdsi_cat7 cat7


eststo: xtreg ihslights ib6.dsi_cat7##i.ifdam $weather if flare==0, fe vce(cluster conpfaf4) absorb(contXyear)
testparm *dsi_cat7#1.ifdam
estadd scalar Fstat = r(F)
estadd scalar p_val = r(p)
eststo rdsi_dam



esttab rdsi_dam using "../output/table3_dam_droughtcat.rtf", replace se r2(3) sca("Fstat F test (all dam interactions)" "p_val p value"  "N_clust N subbasins" "N_g N cells" ) obslast ///
star( + .10 * .05 ** .01 ) drop(*dev_* _cons 1.ifdam) nobaselevels  label ///
title("Table 3. Effects of local dams for all drought conditions") ///
addnote("Notes: Dependent variable is inverse hyperbolic sine of nighttime lights index.  Near normal drought category excluded. Standard errors in parentheses are clustered by 4-digit Pfafstetter subbasin. All models include a cubic in temperature, fixed effects for cells, and continent*year effects.")
eststo clear


eststo: xtreg ihslights ib6.pdsi_cat7##i.ifdam $weather if flare==0, fe vce(cluster conpfaf4) absorb(contXyear)
testparm *pdsi_cat7#1.ifdam 
estadd scalar Fstat = r(F)
estadd scalar p_value = r(p)
eststo pdsi_dam

esttab pdsi_dam using "../output/tableD2_dam_droughtcat.rtf", replace se r2(3) sca( "Fstat F test (all dam interactions)"  "p_value p value"  "N_clust N subbasins" "N_g N cells" ) obslast /// 
star( + .10 * .05 ** .01 ) drop(*dev_* 1.ifdam _cons) nobaselevels label  ///
title("Table D1. Effects of local dams for full set of drought conditions using sc-PDSI")  ///
addnote("Notes: Dependent variable is inverse hyperbolic sine of nighttime lights index.  Near normal drought category excluded. Standard errors in parentheses are clustered by 4-digit Pfafstetter subbasin. All models include a cubic in temperature, fixed effects for cells, and continent*year effects.")
eststo clear

**# Table 4: Interactions of dam characteristics with drought dummy

* characteristics for interactions
replace rescap=rescap/10^3
replace rescaphydro=rescaphydro/10^3
gen farupdam=((upstream_dams-up1_count_dams)>0)
gen pop=pop2000/10^3
label var farupdam "Far upstream dam"
label var pop "Population density"


*explicit interactions for cleaner output
foreach var in ifdam ifhydro pop rescap rescaphydro up1_ifdam up1_ifhydro farupdam {
	local lbl: variable label `var'
*extreme/severe/moderate (esm)
	gen esmdsi_`var'=(dsi_cat11<=3)*`var'
	gen esmpdsi_`var'=(pdsi_cat<=3)*`var'
	label var esmdsi_`var' "`lbl'*drought"
	label var esmpdsi_`var' "`lbl'*drought"
	}

	
*Remoted sensed DSI (moderate or worse drought) 
eststo: xtreg ihslights esmdsi esmdsi_ifdam $weather if flare==0, fe vce(cluster conpfaf4) absorb(contXyear)
eststo: xtreg ihslights esmdsi esmdsi_ifdam esmdsi_ifhydro $weather if flare==0, fe vce(cluster conpfaf4)  absorb(contXyear)
eststo: xtreg ihslights esmdsi esmdsi_ifdam esmdsi_ifhydro esmdsi_pop $weather if flare==0, fe vce(cluster conpfaf4)  absorb(contXyear)
eststo: xtreg ihslights esmdsi esmdsi_ifdam esmdsi_rescap esmdsi_ifhydro esmdsi_rescaphydro $weather if flare==0, fe vce(cluster conpfaf4)  absorb(contXyear)

esttab using "../output/table4_dam_hetero_dsi.rtf", replace se r2(3) sca( "N_clust N subbasins" "N_g N cells") order(esmdsi esmdsi_ifdam esmdsi_rescap esmdsi_ifhydro esmdsi_rescaphydro esmdsi_pop) nomti obslast label noconstant onecell star( + .10 * .05 ** .01 ) drop(*dev_* _cons) /*
*/ title("Table 4. Effects of dams on impacts of moderate-or-worse drought") /*
*/ addnote("Notes: Dependent variable is inverse hyperbolic sine of nighttime lights index. Standard errors in parentheses are clustered by 4-digit Pfafstetter subbasin. All models include a cubic in temperature, fixed effects for cells, and continent*year effects.")
eststo clear

*sc-PDSI (moderate or worse drought)
eststo: xtreg ihslights esmpdsi esmpdsi_ifdam $weather i.year#i.continent if flare==0, fe vce(cluster conpfaf4)
eststo: xtreg ihslights esmpdsi esmpdsi_ifdam esmpdsi_ifhydro $weather i.year#i.continent if flare==0, fe vce(cluster conpfaf4)
eststo: xtreg ihslights esmpdsi esmpdsi_ifdam esmpdsi_ifhydro esmpdsi_pop $weather i.year#i.continent if flare==0, fe vce(cluster conpfaf4)
eststo: xtreg ihslights esmpdsi esmpdsi_ifdam esmpdsi_rescap esmpdsi_ifhydro esmpdsi_rescaphydro $weather i.year#i.continent if flare==0, fe vce(cluster conpfaf4)

esttab using "../output/tableD3_dam_hetero_pdsi.rtf", replace se r2(3) sca( "N_clust N subbasins" "N_g N cells") order(esmpdsi esmpdsi_ifdam esmpdsi_rescap esmpdsi_ifhydro esmpdsi_rescaphydro esmpdsi_pop) nomti obslast label noconstant onecell star( + .10 * .05 ** .01 ) drop(*dev_* *.year* _cons) /*
*/ title("Table D2. Effects of dams on impacts of drought using sc-PDSI") /*
*/ addnote("Notes: Dependent variable is inverse hyperbolic sine of nighttime lights index. Standard errors in parentheses are clustered by 4-digit Pfafstetter subbasin. All models include a cubic in temperature, fixed effects for cells, and continent*year effects.")
eststo clear

**# Table 5. Upstream dams

eststo: xtreg ihslights esmdsi esmdsi_ifdam $weather i.year##i.continent if flare==0 & !no_upstream, fe vce(cluster conpfaf4)
eststo: xtreg ihslights esmdsi esmdsi_ifdam esmdsi_up1_ifdam $weather i.year##i.continent if flare==0, fe vce(cluster conpfaf4)
eststo: xtreg ihslights esmdsi esmdsi_ifdam esmdsi_up1_ifdam esmdsi_farupdam $weather i.year##i.continent if flare==0, fe vce(cluster conpfaf4)
eststo: xtreg ihslights esmdsi esmdsi_ifdam esmdsi_ifhydro esmdsi_up1_ifdam esmdsi_up1_ifhydro $weather i.year##i.continent if flare==0, fe vce(cluster conpfaf4)


esttab using "../output/table5_upstream_dams_dsi.rtf", replace se r2(3) sca("N_clust N subbasins" "N_g N cells") ///
   nomti obslast label star( + .10 * .05 ** .01 ) onecell drop(*dev_* *cont* *.year* _cons) order(esmdsi esmdsi_ifdam esmdsi_up1_ifdam esmdsi_farupdam esmdsi_ifhydro esmdsi_up1_ifhydro)/*
*/ title("Table %. Effects of upstream dams on drought") /*
*/ addnote("Notes: Dependent variable is inverse hyperbolic sine of nighttime lights index. Drought index is the DSI. Standard errors in parentheses are clustered by 4-digit Pfafstetter subbasin. All models include a cubic in temperature, fixed effects for cells, and continent*year effects. Restricted to areas with at least one upstream subbasin.")
eststo clear

*sc-PDSI, upstream dams
eststo: xtreg ihslights esmpdsi esmpdsi_ifdam $weather i.year#i.continent if flare==0 & !no_upstream, fe vce(cluster conpfaf4)
eststo: xtreg ihslights esmpdsi esmpdsi_ifdam esmpdsi_up1_ifdam $weather i.year#i.continent if flare==0, fe vce(cluster conpfaf4)
eststo: xtreg ihslights esmpdsi esmpdsi_ifdam esmpdsi_up1_ifdam esmpdsi_farupdam $weather i.year#i.continent if flare==0, fe vce(cluster conpfaf4)
eststo: xtreg ihslights esmpdsi esmpdsi_ifdam esmpdsi_ifhydro esmpdsi_up1_ifdam esmpdsi_up1_ifhydro $weather i.year#i.continent if flare==0, fe vce(cluster conpfaf4)


esttab using "../output/tableD4_upstream_dams_pdsi.rtf", replace se r2(3) sca("N_clust N subbasins" "N_g N cells") ///
   nomti obslast label star( + .10 * .05 ** .01 ) onecell drop(*dev_* *.year* _cons) /*
*/ title("Table D4. Effects of upstream dams on impacts of drought using sc-PDSI") /*
*/ addnote("Notes: Dependent variable is inverse hyperbolic sine of nighttime lights index. Standard errors in parentheses are clustered by 4-digit Pfafstetter subbasin. All models include a cubic in temperature, fixed effects for cells, and continent*year effects. Restricted to areas with at least one upstream sub-basin.")
eststo clear

**# Table 6. Upstream drought effects


gen up_esmdsi = upstream_drought<-.9 if !no_upstream
label var up_esmdsi "Upstream drought"

gen up_ifdam=(upstream_dams>0)
gen esmdsi_upifdam=esmdsi*up_ifdam
gen up_dam_drought=up_esmdsi*up_ifdam
gen uphydro_drought=up_esmdsi*(upstream_hydro>0)

label var esmdsi_upifdam "Upstream dam*local drought"
label var up_dam_drought "Upstream dam-upstream drought"
label var uphydro_drought "Upstream hydro-upstream drought"

eststo: xtreg ihslights esmdsi up_esmdsi $weather i.year#i.continent if flare==0 & !no_upstream, fe vce(cluster conpfaf4)
test esmdsi=up_esmdsi

eststo: xtreg ihslights esmdsi up_esmdsi esmdsi_ifdam esmdsi_ifhydro up_dam_drought uphydro_drought $weather i.year#i.continent if flare==0 & !no_upstream, fe vce(cluster conpfaf4)

esttab using "../output/table6_upstream_drought.rtf", replace se r2(3) sca("N_clust N subbasins" "N_g N cells") ///
   nomti nonote obslast label star( + .10 * .05 ** .01 ) onecell drop(*dev_* *.year* _cons) /*
*/ title("Table 6. Effects of upstream drought and dams") /*
*/ addnote("Notes: Dependent variable is inverse hyperbolic sine of nighttime lights index. Standard errors in parentheses are clustered by 4-digit Pfafstetter subbasin. All models include a cubic in temperature, fixed effects for cells, and continent*year effects. Restricted to areas with at least one upstream sub-basin.")
eststo clear


**# Table 7. Irrigation and land use

*Generate shares of cropland that are irrigated --- rather than total land
foreach var in irrig irrig_sw irrig_gw {
gen `var'_crop=`var'/cropland
replace `var'_crop=0 if cropland==0
replace `var'_crop=1 if `var'_crop>1
}
gen nonirrig_crop=cropland-irrig_crop
label var nonirrig_crop "Nonirrigated cropland"
label var irrig_sw_crop "Surface water irrigated cropland"
label var irrig_gw_crop "Groundwater irrigated cropland"

foreach var in urban urbanshare cropland irrig_crop irrig_sw_crop irrig_gw_crop nonirrig_crop {
 gen esmdsi_`var'=esmdsi*`var'	
 local lbl: variable label `var'
 label var esmdsi_`var' "`lbl'*drought"
 }

summarize irrig*crop, detail
 

eststo: xtreg ihslights esmdsi esmdsi_urbanshare esmdsi_cropland $weather if flare==0 , fe vce(cluster conpfaf4) absorb(contXyear)

eststo: xtreg ihslights esmdsi esmdsi_urbanshare esmdsi_irrig_sw_crop esmdsi_irrig_gw_crop esmdsi_nonirrig_crop $weather if flare==0 , fe vce(cluster conpfaf4) absorb(contXyear)

eststo: xtreg ihslights esmdsi esmdsi_urbanshare esmdsi_irrig_sw_crop esmdsi_irrig_gw_crop esmdsi_nonirrig_crop  esmdsi_ifdam esmdsi_ifhydro esmdsi_alluv esmdsi_loc_shal esmdsi_complex esmdsi_ihsgwresource $weather if flare==0 , fe vce(cluster conpfaf4) absorb(contXyear)

esttab using "../output/table7_landuse_irrig.rtf", replace se r2(3) sca("N_clust N subbasins" "N_g N cells") ///
   nomti nonote obslast label star( + .10 * .05 ** .01 ) onecell drop(*dev_* _cons) /*
*/ title("Table 7. Effects of land use and irrigation") /*
*/ addnote("Notes: Dependent variable is inverse hyperbolic sine of nighttime lights index. Standard errors in parentheses are clustered by 4-digit Pfafstetter subbasin. All models include a cubic in temperature, fixed effects for cells, and continent*year effects.")
eststo clear


** Table 8. Country effects


**# Create explicit dataset of country-year effects
*Note: country-year effects xtreg takes a long time, so effects saved in country_year_effect.dta to save time once run

egen country_year_id = group(country year)

*set up file to merge country-year effects back into with identifiers
preserve
collapse (firstnm) country year, by(country_year_id)
tempfile id_map
save `id_map'
restore

* run with explicity country-year coefficients
quietly: xtreg ihslights i.esmdsi##i.ifdam i.esmdsi#i.ifhydro $weather i.country_year_id if flare==0, fe vce(cluster country)  


*  Prepare file to make each result a different obs
tempfile results_file
postfile results_post country_year_id_num country_year_effect using `results_file', replace

* Loop through each unique country-year ID and save the coefficient.
quietly: levelsof country_year_id, local(ids)
foreach id of local ids {
    local coeff_value = _b[`id'.country_year_id]
    post results_post (`id') (`coeff_value')
}
postclose results_post

* Load the results and merge with the country/year identifiers.
use `results_file', clear
rename country_year_id_num country_year_id

merge 1:1 country_year_id using `id_map'

drop if _merge!=3
drop _merge country_year_id


save "../data/country_year_effects.dta", replace


**#  Table 8. Country heterogeneity

use for_reg, clear

merge m:1 country year using "../data/country_year_effects.dta"
drop if _merge==2
drop _merge

egen cellid=group(x y)
xtset cellid year

foreach var in ifdam ifhydro  {
	local lbl: variable label `var'
*extreme/severe/moderate
	gen esmdsi_`var'=(dsi_cat11<=3)*`var'
	gen esmpdsi_`var'=(pdsi_cat<=3)*`var'
	label var esmdsi_`var' "`lbl'*drought"
	label var esmpdsi_`var' "`lbl'*drought"
	}


bysort country year: egen esmdsi_country=mean(esmdsi)
label var esmdsi_country "Country average drought"
* Statistic reported in table about frequency of country=-level drought
table esmdsi, stat(mean esmdsi_country) stat(median esmdsi_country)


egen countryXyear=group(country year)
egen contXyear=group(cont year)

** country year effects
eststo: xtreg ihslights esmdsi esmdsi_ifdam esmdsi_ifhydro $weather if flare==0, fe vce(cluster country)  absorb(countryXyear)
quietly estadd local c_y  "Yes", replace
quietly estadd local ct_y "No", replace 


** explain country year effects
eststo: xtreg country_year_effect esmdsi esmdsi_ifdam esmdsi_ifhydro $weather if flare==0, fe vce(cluster country) absorb(contXyear)
quietly estadd local c_y  "No", replace
quietly estadd local ct_y "Yes", replace

** country level drought
eststo: xtreg ihslights esmdsi esmdsi_country esmdsi_ifdam esmdsi_ifhydro $weather if flare==0, fe vce(cluster country)  absorb(contXyear)
quietly estadd local c_y  "No", replace
quietly estadd local ct_y "Yes", replace



esttab using "../output/table8_country.rtf", replace se r2(3) sca("c_y Country*year effects" "ct_y Continent*year effects" "N_clust N countries" "N_g N cells") nomti nonote obslast label noconstant onecell star( + .10 * .05 ** .01 ) drop(*dev_tmp*  _cons)  /*
*/ title("Table 8. Drought and dam impacts, country-year effects") /*
*/ addnote("Notes: Standard errors in parentheses are clustered by country. All models include a cubic in temperature and fixed effects for cells.")
 eststo clear
 
timer off 1
timer list 1
log close

timer clear 1 